#!/usr/bin/python3
import os

import bs4
import requests

print('---start---\n')

webRoot = 'http://www.biqukan.com'
nextUrl = '/1_1408/15948540.html'
filePath = '/Users/liut/myTxt.txt'

if os.path.exists(filePath):
    os.remove(filePath)
file = open(filePath, 'w')

while nextUrl is not None:
    print('开始抓取: ' + webRoot + nextUrl)
    # GET 网页内容
    resp = requests.get(webRoot + nextUrl)
    # 使用　bs4 解析网页
    soup = bs4.BeautifulSoup(resp.text, "html.parser")
    soup.findAll()
    showtxt = soup.find(name='div', class_='showtxt') \
        .text \
        .replace('\xa0' * 8, '\n\n') \
        .replace('　　', '\n\n')
    finalTxt = '\n《' + soup.title.text + '》\n\n' + \
               showtxt + \
               "\n\n==========本章完==========\n\n"
    file.write(finalTxt)
    # print(finalTxt)
    print('成功抓取： ' + soup.title.text)
    nextSoup = soup.find('a', text='下一章')
    nextUrl = nextSoup.get('href')
file.close()
print('抓取完成')
